Inspired in Roberto Pontes blog Neuroinvest
REQUIREMENTS
$ pip install yahoo-finance
TODO
REFERENCES
In [1]:
%matplotlib inline
%pylab inline
pylab.rcParams['figure.figsize'] = (14, 9)
In [2]:
import pandas as pd
import json
import matplotlib.pyplot as plt
from matplotlib import style
from yahoo_finance import Share
from pprint import pprint
from datetime import datetime, timedelta
from datetime import date
from sklearn.cross_validation import train_test_split
from sklearn.cross_validation import cross_val_score
from sklearn.cross_validation import KFold
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from keras.wrappers.scikit_learn import KerasRegressor
In [3]:
style.use('fivethirtyeight')
In [4]:
# 5 years rolling to date
days_to_subtract = 133
today_is = datetime.today()
five_years_to_date_is = datetime.today() - timedelta(days=days_to_subtract)
print ("Today is: ") + str(today_is.date()) + (" and 5 Years to Date is: ") + str(five_years_to_date_is.date())
In [5]:
wege = Share('WEGE3.SA')
In [6]:
hoje = str(today_is.year) + "/" + str(today_is.month) + "/" + str(today_is.day)
cinco_anos = str(five_years_to_date_is.year) + "/" + str(five_years_to_date_is.month) + "/" + str(five_years_to_date_is.day)
hoje = date(*map(int, hoje.split('/')))
cinco_anos = date(*map(int, cinco_anos.split('/')))
hoje = str(hoje)
cinco_anos = str(cinco_anos)
In [7]:
df_wege = wege.get_historical(cinco_anos, hoje)
In [8]:
df_wege = json.dumps(df_wege)
In [9]:
df_wege = pd.read_json(df_wege)
In [10]:
df_wege = df_wege.sort_values(['Date'], ascending=[1])
In [11]:
n = 15
In [12]:
def MA(df, n):
"""
Moving Average
"""
result = df['Close'].rolling(window=n,center=False).mean()
return (result)
In [13]:
plt.xticks(rotation=90)
plt.plot(df_wege.Date, df_wege['Close']
,df_wege.Date, MA(df_wege, n)
)
Out[13]:
In [14]:
def BBANDS(df, n):
"""
Bollinger Bands
"""
MA = df['Close'].rolling(window=n,center=False).mean()
MSD = df['Close'].rolling(window=n,center=False).std()
UpperBollinger = MA + (MSD * 2)
LowerBollinger = MA - (MSD * 2)
B1 = pd.Series(UpperBollinger, name='UpperBollinger_' + str(n))
B2 = pd.Series(LowerBollinger, name='LowerBollinger_' + str(n))
result = pd.DataFrame([B1, B2]).transpose()
return (result)
In [15]:
plt.xticks(rotation=90)
plt.plot(df_wege.Date, df_wege['Close']
,df_wege.Date, BBANDS(df_wege, n).UpperBollinger_15
,df_wege.Date, BBANDS(df_wege, n).LowerBollinger_15
)
Out[15]:
In [16]:
def STOK(df):
"""
Stochastic oscillator %K
"""
result = pd.Series((df['Close'] - df['Low']) / (df['High'] - df['Low']), name='SO%k')
return (result)
In [17]:
def STO(df, n):
"""
Stochastic oscillator %D
"""
SOk = pd.Series((df['Close'] - df['Low']) / (df['High'] - df['Low']), name='SO%k')
result = pd.Series(pd.ewma(SOk, span=n, min_periods=n - 1), name='SO%d_' + str(n))
return (result)
In [18]:
def SMA(df, n):
"""
Smoothed Moving Average
"""
result = pd.rolling_mean(df['Close'], n, min_periods=n)
return (result)
In [19]:
def EMA(df, n):
"""
Exponential Moving Average
"""
result=pd.Series(pd.ewma(df['Close'], span=n, min_periods=n - 1), name='EMA_' + str(n))
#Series.ewm(ignore_na=False,span=15,min_periods=14,adjust=True).mean()
return (result)
In [20]:
def MOM(df, n):
"""
Momentum
"""
result=pd.Series(df['Close'].diff(n), name='Momentum_' + str(n))
return (result)
In [21]:
def ROC(df, n):
"""
Rate of Change
"""
M = df['Close'].diff(n - 1)
N = df['Close'].shift(n - 1)
result = pd.Series(M / N, name='ROC_' + str(n))
return (result)
In [22]:
def STDDEV(df, n):
"""
Standard Deviation
"""
result = pd.Series(pd.rolling_std(df['Close'], n), name='STD_' + str(n))
return (result)
In [23]:
def KELCH(df, n):
"""
Keltner Channel
"""
KelChM = pd.Series(pd.rolling_mean((df['High'] + df['Low'] + df['Close']) / 3, n), name='KelChM_' + str(n))
KelChU = pd.Series(pd.rolling_mean((4 * df['High'] - 2 * df['Low'] + df['Close']) / 3, n), name='KelChU_' + str(n))
KelChD = pd.Series(pd.rolling_mean((-2 * df['High'] + 4 * df['Low'] + df['Close']) / 3, n), name='KelChD_' + str(n))
result = pd.DataFrame([KelChM, KelChU, KelChD]).transpose()
return (result)
In [24]:
def CCI(df, n):
"""
Commodity Channel Index
"""
PP = (df['High'] + df['Low'] + df['Close']) / 3
result = pd.Series((PP - pd.rolling_mean(PP, n)) / pd.rolling_std(PP, n), name='CCI_' + str(n))
return (result)
In [25]:
def EOM(df, n):
"""
Ease of Movement
"""
EoM = (df['High'].diff(1) + df['Low'].diff(1)) * (df['High'] - df['Low']) / (2 * df['Volume'])
result = pd.Series(pd.rolling_mean(EoM, n), name='EoM_' + str(n))
return (result)
In [26]:
def FORCE(df, n):
"""
Force Index
"""
result = pd.Series(df['Close'].diff(n) * df['Volume'].diff(n), name='Force_' + str(n))
return (result)
In [27]:
def Chaikin(df):
"""
Chaikin Oscillator
"""
ad = (2 * df['Close'] - df['High'] - df['Low']) / (df['High'] - df['Low']) * df['Volume']
result = pd.Series(pd.ewma(ad, span=3, min_periods=2) - pd.ewma(ad, span=10, min_periods=9), name='Chaikin')
return (result)
In [28]:
def ACCDIST(df, n):
"""
Accumulation/Distribution
"""
ad = (2 * df['Close'] - df['High'] - df['Low']) / (df['High'] - df['Low']) * df['Volume']
M = ad.diff(n - 1)
N = ad.shift(n - 1)
ROC = M / N
result = pd.Series(ROC, name='Acc/Dist_ROC_' + str(n))
return (result)
In [29]:
def TSI(df, r, s):
"""
True Strength Index
r = EMA smoothing period for momentum, typically 25
s = EMA smoothing period for smoothed momentum, typically 13
"""
M = pd.Series(df['Close'].diff(1))
aM = abs(M)
EMA1 = pd.Series(pd.ewma(M, span=r, min_periods=r - 1))
aEMA1 = pd.Series(pd.ewma(aM, span=r, min_periods=r - 1))
EMA2 = pd.Series(pd.ewma(EMA1, span=s, min_periods=s - 1))
aEMA2 = pd.Series(pd.ewma(aEMA1, span=s, min_periods=s - 1))
result = pd.Series(EMA2 / aEMA2, name='TSI_' + str(r) + '_' + str(s))
return (result)
In [30]:
def KST(df, r1, r2, r3, r4, n1, n2, n3, n4):
"""
KST Oscillator
"""
M = df['Close'].diff(r1 - 1)
N = df['Close'].shift(r1 - 1)
ROC1 = M / N
M = df['Close'].diff(r2 - 1)
N = df['Close'].shift(r2 - 1)
ROC2 = M / N
M = df['Close'].diff(r3 - 1)
N = df['Close'].shift(r3 - 1)
ROC3 = M / N
M = df['Close'].diff(r4 - 1)
N = df['Close'].shift(r4 - 1)
ROC4 = M / N
result = pd.Series(pd.rolling_sum(ROC1, n1) + pd.rolling_sum(ROC2, n2) * 2 + pd.rolling_sum(ROC3, n3) * 3 + pd.rolling_sum(ROC4, n4) * 4, name='KST_' + str(r1) + '_' + str(r2) + '_' + str(r3) + '_' + str(r4) + '_' + str(n1) + '_' + str(n2) + '_' + str(n3) + '_' + str(n4))
return (result)
In [31]:
def MassI(df):
"""
Mass Index
"""
Range = df['High'] - df['Low']
EX1 = pd.ewma(Range, span=9, min_periods=8)
EX2 = pd.ewma(EX1, span=9, min_periods=8)
Mass = EX1 / EX2
result = pd.Series(pd.rolling_sum(Mass, 25), name='Mass Index')
return (result)
In [32]:
def MACD(df, n_fast, n_slow):
"""
MACD, MACD Signal and MACD difference
"""
EMAfast = pd.Series(pd.ewma(df['Close'], span=n_fast, min_periods=n_slow - 1))
EMAslow = pd.Series(pd.ewma(df['Close'], span=n_slow, min_periods=n_slow - 1))
MACD = pd.Series(EMAfast - EMAslow, name='MACD_%d_%d' % (n_fast, n_slow))
MACDsign = pd.Series(pd.ewma(MACD, span=9, min_periods=8), name='MACDsign_%d_%d' % (n_fast, n_slow))
MACDdiff = pd.Series(MACD - MACDsign, name='MACDdiff_%d_%d' % (n_fast, n_slow))
result = pd.DataFrame([MACD, MACDsign, MACDdiff]).transpose()
return (result)
In [33]:
def TRIX(df, n):
"""
Trix
"""
EX1 = pd.ewma(df['Close'], span=n, min_periods=n - 1)
EX2 = pd.ewma(EX1, span=n, min_periods=n - 1)
EX3 = pd.ewma(EX2, span=n, min_periods=n - 1)
i = 0
ROC_l = [0]
while i + 1 <= len(df) - 1: # df.index[-1]:
ROC = (EX3[i + 1] - EX3[i]) / EX3[i]
ROC_l.append(ROC)
i = i + 1
result = pd.Series(ROC_l, name='Trix_' + str(n))
return (result)
In [34]:
def ULTOSC(df):
"""
Ultimate Oscillator
"""
i = 0
TR_l = [0]
BP_l = [0]
while i < len(df) - 1: # df.index[-1]:
TR = max(df.get_value(i + 1, 'High'), df.get_value(i, 'Close')) - min(df.get_value(i + 1, 'Low'), df.get_value(i, 'Close'))
TR_l.append(TR)
BP = df.get_value(i + 1, 'Close') - min(df.get_value(i + 1, 'Low'), df.get_value(i, 'Close'))
BP_l.append(BP)
i = i + 1
result = pd.Series((4 * pd.rolling_sum(pd.Series(BP_l), 7) / pd.rolling_sum(pd.Series(TR_l), 7)) + (2 * pd.rolling_sum(pd.Series(BP_l), 14) / pd.rolling_sum(pd.Series(TR_l), 14)) + (pd.rolling_sum(pd.Series(BP_l), 28) / pd.rolling_sum(pd.Series(TR_l), 28)), name='Ultimate_Osc')
return (result)
In [35]:
def PPSR(df):
"""
Pivot Points, Supports and Resistances
"""
PP = pd.Series((df['High'] + df['Low'] + df['Close']) / 3)
R1 = pd.Series(2 * PP - df['Low'])
S1 = pd.Series(2 * PP - df['High'])
R2 = pd.Series(PP + df['High'] - df['Low'])
S2 = pd.Series(PP - df['High'] + df['Low'])
R3 = pd.Series(df['High'] + 2 * (PP - df['Low']))
S3 = pd.Series(df['Low'] - 2 * (df['High'] - PP))
result = pd.DataFrame([PP, R1, S1, R2, S2, R3, S3]).transpose()
return (result)
In [36]:
def ATR(df, n):
"""
Average True Range
"""
i = 0
TR_l = [0]
while i < len(df) - 1: # df.index[-1]:
# for i, idx in enumerate(df.index)
# TR=max(df.get_value(i + 1, 'High'), df.get_value(i, 'Close')) - min(df.get_value(i + 1, 'Low'), df.get_value(i, 'Close'))
TR = max(df['High'].iloc[i + 1], df['Close'].iloc[i] - min(df['Low'].iloc[i + 1], df['Close'].iloc[i]))
TR_l.append(TR)
i = i + 1
TR_s = pd.Series(TR_l)
result = pd.Series(pd.ewma(TR_s, span=n, min_periods=n), name='ATR_' + str(n))
return (result)
In [37]:
def Vortex(df, n):
"""
Vortex Indicator
"""
i = 0
TR = [0]
while i < len(df) - 1: # df.index[-1]:
Range = max(df.get_value(i + 1, 'High'), df.get_value(i, 'Close')) - min(df.get_value(i + 1, 'Low'), df.get_value(i, 'Close'))
TR.append(Range)
i = i + 1
i = 0
VM = [0]
while i < len(df) - 1: # df.index[-1]:
Range = abs(df.get_value(i + 1, 'High') - df.get_value(i, 'Low')) - abs(df.get_value(i + 1, 'Low') - df.get_value(i, 'High'))
VM.append(Range)
i = i + 1
result = pd.Series(pd.rolling_sum(pd.Series(VM), n) / pd.rolling_sum(pd.Series(TR), n), name='Vortex_' + str(n))
return (result)
In [38]:
def RSI(df, n):
"""
Relative Strength Index
"""
i = 0
UpI = [0]
DoI = [0]
while i + 1 <= len(df) - 1: # df.index[-1]
UpMove = df.get_value(i + 1, 'High') - df.get_value(i, 'High')
DoMove = df.get_value(i, 'Low') - df.get_value(i + 1, 'Low')
if UpMove > DoMove and UpMove > 0:
UpD = UpMove
else:
UpD = 0
UpI.append(UpD)
if DoMove > UpMove and DoMove > 0:
DoD = DoMove
else:
DoD = 0
DoI.append(DoD)
i = i + 1
UpI = pd.Series(UpI)
DoI = pd.Series(DoI)
PosDI = pd.Series(pd.ewma(UpI, span=n, min_periods=n - 1))
NegDI = pd.Series(pd.ewma(DoI, span=n, min_periods=n - 1))
result = pd.Series(PosDI / (PosDI + NegDI), name='RSI_' + str(n))
return (result)
In [39]:
def MFI(df, n):
"""
Money Flow Index and Ratio
"""
PP = (df['High'] + df['Low'] + df['Close']) / 3
i = 0
PosMF = [0]
while i < len(df) - 1: # df.index[-1]:
if PP[i + 1] > PP[i]:
PosMF.append(PP[i + 1] * df.get_value(i + 1, 'Volume'))
else:
PosMF.append(0)
i=i + 1
PosMF = pd.Series(PosMF)
TotMF = PP * df['Volume']
MFR = pd.Series(PosMF / TotMF)
result = pd.Series(pd.rolling_mean(MFR, n), name='MFI_' + str(n))
return (result)
In [40]:
def OBV(df, n):
"""
On-balance Volume
"""
i = 0
OBV = [0]
while i < len(df) - 1: # df.index[-1]:
if df.get_value(i + 1, 'Close') - df.get_value(i, 'Close') > 0:
OBV.append(df.get_value(i + 1, 'Volume'))
if df.get_value(i + 1, 'Close') - df.get_value(i, 'Close') == 0:
OBV.append(0)
if df.get_value(i + 1, 'Close') - df.get_value(i, 'Close') < 0:
OBV.append(-df.get_value(i + 1, 'Volume'))
i = i + 1
OBV = pd.Series(OBV)
result = pd.Series(pd.rolling_mean(OBV, n), name='OBV_' + str(n))
return (result)
In [41]:
def COPP(df, n):
"""
Coppock Curve
"""
M = df['Close'].diff(int(n * 11 / 10) - 1)
N = df['Close'].shift(int(n * 11 / 10) - 1)
ROC1 = M / N
M = df['Close'].diff(int(n * 14 / 10) - 1)
N = df['Close'].shift(int(n * 14 / 10) - 1)
ROC2 = M / N
result = pd.Series(pd.ewma(ROC1 + ROC2, span=n, min_periods=n), name='Copp_' + str(n))
return (result)
In [42]:
def ADX(df, n, n_ADX):
"""
Average Directional Movement Index
"""
i = 0
UpI = []
DoI = []
while i + 1 <= len(df) - 1: # df.index[-1]:
UpMove = df.get_value(i + 1, 'High') - df.get_value(i, 'High')
DoMove = df.get_value(i, 'Low') - df.get_value(i + 1, 'Low')
if UpMove > DoMove and UpMove > 0:
UpD = UpMove
else:
UpD = 0
UpI.append(UpD)
if DoMove > UpMove and DoMove > 0:
DoD = DoMove
else:
DoD = 0
DoI.append(DoD)
i = i + 1
i = 0
TR_l = [0]
while i < len(df) - 1: # df.index[-1]:
TR = max(df.get_value(i + 1, 'High'), df.get_value(i, 'Close')) - min(df.get_value(i + 1, 'Low'), df.get_value(i, 'Close'))
TR_l.append(TR)
i = i + 1
TR_s = pd.Series(TR_l)
ATR = pd.Series(pd.ewma(TR_s, span=n, min_periods=n))
UpI = pd.Series(UpI)
DoI = pd.Series(DoI)
PosDI = pd.Series(pd.ewma(UpI, span=n, min_periods=n - 1) / ATR)
NegDI = pd.Series(pd.ewma(DoI, span=n, min_periods=n - 1) / ATR)
result = pd.Series(pd.ewma(abs(PosDI - NegDI) / (PosDI + NegDI), span=n_ADX, min_periods=n_ADX - 1), name='ADX_' + str(n) + '_' + str(n_ADX))
return (result)
In [43]:
# Have a bug in max function in series.
# def DONCH(df, n):
# """
# Donchian Channel
# """
# i = 0
# DC_l = []
# while i < n - 1:
# DC_l.append(0)
# i = i + 1
# i = 0
# while i + n - 1 < df.index[-1]:
# DC = max(df['High'].ix[i:i + n - 1]) - min(df['Low'].ix[i:i + n - 1])
# DC_l.append(DC)
# i = i + 1
# DonCh = pd.Series(DC_l, name = 'Donchian_' + str(n))
# DonCh = DonCh.shift(n - 1)
# result = df.join(DonCh)
# return (result)
In [44]:
df = df_wege
In [45]:
result = pd.concat([df
,STOK(df)
,MassI(df)
,ULTOSC(df)
,PPSR(df)
,STO(df, n)
,MA(df, n)
,BBANDS(df, n)
,SMA(df, n)
,EMA(df, n)
,MOM(df, n)
,ROC(df, n)
,STDDEV(df, n)
,KELCH(df, n)
,CCI(df, n)
,EOM(df, n)
,FORCE(df, n)
,Chaikin(df)
,ACCDIST(df, n)
,TRIX(df, n)
,ATR(df, n)
,Vortex(df, n)
,RSI(df, n)
,MFI(df, n)
,OBV(df, n)
,COPP(df, n)
,TSI(df, 25, 13)
,ADX(df, n, 25)
,MACD(df, 15, 60)
,KST(df, 10, 15, 20, 30, 1, 2, 3, 4)]
,axis=1)
In [46]:
result
Out[46]:
In [47]:
result.corr()["Close"]
Out[47]:
In [48]:
import keras
import os
import numpy as np
import pandas as pd
import pydot
import graphviz
from sklearn import preprocessing
from keras.models import Sequential
from keras.models import model_from_json
from keras.layers import Dense
from keras.layers.core import Dropout
from keras.optimizers import SGD
In [49]:
result.shape
Out[49]:
In [50]:
# Dataset with independent variables
X = result.iloc[:,[3,4,5,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47]]
# Dataset with the dependent variable (Ajusted Close)
Y = result.iloc[:,[0]]
In [51]:
X = X.fillna(0)
In [52]:
X = X.astype(numpy.int64)
In [53]:
X.isnull().any()
Out[53]:
In [54]:
X.High.dtype
Out[54]:
In [55]:
train_x, test_x, train_y, test_y = train_test_split(X, Y, test_size=0.2, random_state=0 )
In [56]:
# Convert Pandas datasets to numpy arrays
train_x = train_x.as_matrix()
test_x = test_x.as_matrix()
train_y = train_y.as_matrix()
test_y = test_y.as_matrix()
In [57]:
# Standardize the data attributes
train_x = preprocessing.scale(train_x)
test_x = preprocessing.scale(test_x)
In [58]:
# Set seed
np.random.seed(12345)
In [59]:
# The shape of the data (# Records, # Columns)
print (train_x.shape)
print (test_x.shape)
print (train_y.shape)
print (test_y.shape)
In [60]:
# Setup of Constants
# Number of columns (Same of number of columns)
input_dim = X.shape[1]
# Constants of dense()
neurons_layer_01 = input_dim
neurons_layer_02 = 200
neurons_layer_03 = 200
neurons_layer_04 = 200
neurons_layer_05 = 200
neurons_output = 1
activation_output = 'relu'
# Constants of dropout()
dropout = 0.2
# Constants of compile() | If we'll use Stocastic Gradient Descent, we'll use 'sdg' parameter on our optimizer
loss = 'mean_squared_error'
optimizer = 'adam'
metrics = ['accuracy']
sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
# Constants of fit()
validation_split = 0.2
epochs = 500
batch_size = 3
verbose = 0
shuffle = 1
initial_epoch = 0
In [61]:
# First we'll create a sequential model where we'll put some layers in our network topology
model = Sequential()
model.add(Dense(input_dim, input_dim=input_dim, activation='relu')) # First hidden layer
model.add(Dense(neurons_layer_02, kernel_initializer = 'normal', activation='relu')) # Second hidden layer
model.add(Dense(neurons_layer_03, kernel_initializer = 'normal', activation='relu')) # Third hidden layer
model.add(Dropout(dropout)) # Dropout layer
model.add(Dense(neurons_layer_04, kernel_initializer = 'normal', activation='relu')) # Fourth hidden layer
model.add(Dense(neurons_layer_05, kernel_initializer = 'normal', activation='relu')) # Fifth hidden layer
model.add(Dense(neurons_output, activation=activation_output))
In [62]:
# To compile model we need to set 1) Loss Function, 2) Optimizer, and 3) Metrics to get the quality of the model
model.compile(loss=loss, optimizer=optimizer, metrics=metrics)
In [63]:
# Fit the model
model.fit( train_x
,train_y
,validation_split = validation_split
,epochs = epochs
,batch_size = batch_size
,verbose = verbose
,shuffle = shuffle
,initial_epoch = initial_epoch
)
Out[63]:
In [64]:
model.summary()
In [65]:
pred_y = model.predict(test_x)
r2 = r2_score(test_y, pred_y)
rmse = mean_squared_error(test_y, pred_y)
print( "Performance \n\nR2 : {0:f}\nRMSE : {1:f}".format(r2, rmse))
In [66]:
data = numpy.concatenate((test_y,pred_y, test_y-pred_y, (100-(test_y/pred_y)*100)), axis=1)
df = pd.DataFrame(data)
df.columns = ['y', 'y_hat', 'Error', '%Diff']
df
Out[66]: